/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    ClassOrder.java
 *    Copyright (C) 2002-2012 University of Waikato, Hamilton, New Zealand
 *
 */
package weka.filters.supervised.attribute;

import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;
import weka.core.WeightedAttributesHandler;
import weka.core.WeightedInstancesHandler;
import weka.filters.Filter;
import weka.filters.SupervisedFilter;

/**
 * <!-- globalinfo-start --> Changes the order of the classes so that the class
 * values are no longer of in the order specified in the header. The values will
 * be in the order specified by the user -- it could be either in
 * ascending/descending order by the class frequency or in random order.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -R &lt;seed&gt;
 *  Specify the seed of randomization
 *  used to randomize the class
 *  order (default: 1)
 * </pre>
 * 
 * <pre>
 * -C &lt;order&gt;
 *  Specify the class order to be
 *  sorted, could be 0: ascending
 *  1: descending and 2: random.(default: 0)
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author Xin Xu (xx5@cs.waikato.ac.nz)
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @version $Revision$
 */
public class ClassOrder extends Filter implements SupervisedFilter, OptionHandler, WeightedAttributesHandler, WeightedInstancesHandler {

    /** for serialization */
    static final long serialVersionUID = -2116226838887628411L;

    /** The seed of randomization */
    private long m_Seed = 1;

    /** The random object */
    private Random m_Random = null;

    /**
     * The 1-1 converting table from the original class values to the new values
     */
    private int[] m_Converter = null;

    /** Class attribute of the data */
    private Attribute m_ClassAttribute = null;

    /** The class order to be sorted */
    private int m_ClassOrder = 0;

    /** The class values are sorted in ascending order based on their frequencies */
    public static final int FREQ_ASCEND = 0;

    /**
     * The class values are sorted in descending order based on their frequencies
     */
    public static final int FREQ_DESCEND = 1;

    /** The class values are sorted in random order */
    public static final int RANDOM = 2;

    /**
     * This class can provide the class distribution in the sorted order as side
     * effect
     */
    private double[] m_ClassCounts = null;

    /**
     * Returns a string describing this filter
     * 
     * @return a description of the filter suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String globalInfo() {

        return "Changes the order of the classes so that the class values are " + "no longer of in the order specified in the header. " + "The values will be in the order specified by the user " + "-- it could be either in ascending/descending order by the class " + "frequency or in random order.";
    }

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     */
    @Override
    public Enumeration<Option> listOptions() {

        Vector<Option> newVector = new Vector<Option>(2);

        newVector.addElement(new Option("\tSpecify the seed of randomization\n" + "\tused to randomize the class\n" + "\torder (default: 1)", "R", 1, "-R <seed>"));

        newVector.addElement(new Option("\tSpecify the class order to be\n" + "\tsorted, could be 0: ascending\n" + "\t1: descending and 2: random.(default: 0)", "C", 1, "-C <order>"));

        return newVector.elements();
    }

    /**
     * Parses a given list of options.
     * <p/>
     * 
     * <!-- options-start --> Valid options are:
     * <p/>
     * 
     * <pre>
     * -R &lt;seed&gt;
     *  Specify the seed of randomization
     *  used to randomize the class
     *  order (default: 1)
     * </pre>
     * 
     * <pre>
     * -C &lt;order&gt;
     *  Specify the class order to be
     *  sorted, could be 0: ascending
     *  1: descending and 2: random.(default: 0)
     * </pre>
     * 
     * <!-- options-end -->
     * 
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    @Override
    public void setOptions(String[] options) throws Exception {

        String seedString = Utils.getOption('R', options);
        if (seedString.length() != 0) {
            m_Seed = Long.parseLong(seedString);
        } else {
            m_Seed = 1;
        }

        String orderString = Utils.getOption('C', options);
        if (orderString.length() != 0) {
            m_ClassOrder = Integer.parseInt(orderString);
        } else {
            m_ClassOrder = FREQ_ASCEND;
        }

        if (getInputFormat() != null) {
            setInputFormat(getInputFormat());
        }

        m_Random = null;

        Utils.checkForRemainingOptions(options);
    }

    /**
     * Gets the current settings of the filter.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    @Override
    public String[] getOptions() {

        Vector<String> options = new Vector<String>();

        options.add("-R");
        options.add("" + m_Seed);
        options.add("-C");
        options.add("" + m_ClassOrder);

        return options.toArray(new String[0]);
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String seedTipText() {
        return "Specify the seed of randomization of the class order";
    }

    /**
     * Get the current randomization seed
     * 
     * @return a seed
     */
    public long getSeed() {
        return m_Seed;
    }

    /**
     * Set randomization seed
     * 
     * @param seed the set seed
     */
    public void setSeed(long seed) {
        m_Seed = seed;
        m_Random = null;
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String classOrderTipText() {
        return "Specify the class order after the filtering (0: ascending," + "1: descending, or 2: random)";
    }

    /**
     * Get the wanted class order
     * 
     * @return class order
     */
    public int getClassOrder() {
        return m_ClassOrder;
    }

    /**
     * Set the wanted class order
     * 
     * @param order the class order
     */
    public void setClassOrder(int order) {
        m_ClassOrder = order;
    }

    /**
     * Returns the Capabilities of this filter.
     * 
     * @return the capabilities of this object
     * @see Capabilities
     */
    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();

        // attributes
        result.enableAllAttributes();
        result.enable(Capability.MISSING_VALUES);

        // class
        result.enable(Capability.NOMINAL_CLASS);

        return result;
    }

    /**
     * Sets the format of the input instances.
     * 
     * @param instanceInfo an Instances object containing the input instance
     *                     structure (any instances contained in the object are
     *                     ignored - only the structure is required).
     * @return true if the outputFormat may be collected immediately
     * @throws Exception if no class index set or class not nominal
     */
    @Override
    public boolean setInputFormat(Instances instanceInfo) throws Exception {

        super.setInputFormat(new Instances(instanceInfo, 0));

        m_ClassAttribute = instanceInfo.classAttribute();
        m_Random = new Random(m_Seed);
        m_Converter = null;

        int numClasses = instanceInfo.numClasses();
        m_ClassCounts = new double[numClasses];
        return false;
    }

    /**
     * Input an instance for filtering. Ordinarily the instance is processed and
     * made available for output immediately. Some filters require all instances be
     * read before producing output.
     * 
     * @param instance the input instance
     * @return true if the filtered instance may now be collected with output().
     * @throws IllegalStateException if no input format has been defined.
     */
    @Override
    public boolean input(Instance instance) {

        if (getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (m_NewBatch) {
            resetQueue();
            m_NewBatch = false;
        }

        // In case some one use this routine in testing,
        // although he/she should not do so
        if (m_Converter != null) {
            Instance datum = (Instance) instance.copy();
            if (!datum.isMissing(m_ClassAttribute)) {
                datum.setClassValue(m_Converter[(int) datum.classValue()]);
            }
            push(datum, false); // No need to copy instance
            return true;
        }

        if (!instance.isMissing(m_ClassAttribute)) {
            m_ClassCounts[(int) instance.classValue()] += instance.weight();
        }

        bufferInput(instance);
        return false;
    }

    /**
     * Signify that this batch of input to the filter is finished. If the filter
     * requires all instances prior to filtering, output() may now be called to
     * retrieve the filtered instances. Any subsequent instances filtered should be
     * filtered based on setting obtained from the first batch (unless the
     * inputFormat has been re-assigned or new options have been set). This
     * implementation sorts the class values and provide class counts in the output
     * format
     * 
     * @return true if there are instances pending output
     * @throws IllegalStateException if no input structure has been defined,
     * @throws Exception             if there was a problem finishing the batch.
     */
    @Override
    public boolean batchFinished() throws Exception {

        Instances data = getInputFormat();
        if (data == null) {
            throw new IllegalStateException("No input instance format defined");
        }

        if (m_Converter == null) {

            // Get randomized indices and class counts
            int[] randomIndices = new int[m_ClassCounts.length];
            for (int i = 0; i < randomIndices.length; i++) {
                randomIndices[i] = i;
            }
            for (int j = randomIndices.length - 1; j > 0; j--) {
                int toSwap = m_Random.nextInt(j + 1);
                int tmpIndex = randomIndices[j];
                randomIndices[j] = randomIndices[toSwap];
                randomIndices[toSwap] = tmpIndex;
            }

            double[] randomizedCounts = new double[m_ClassCounts.length];
            for (int i = 0; i < randomizedCounts.length; i++) {
                randomizedCounts[i] = m_ClassCounts[randomIndices[i]];
            }

            // Create new order. For the moment m_Converter converts new indices
            // into old ones.
            if (m_ClassOrder == RANDOM) {
                m_Converter = randomIndices;
                m_ClassCounts = randomizedCounts;
            } else {
                int[] sorted = Utils.sort(randomizedCounts);
                m_Converter = new int[sorted.length];
                if (m_ClassOrder == FREQ_ASCEND) {
                    for (int i = 0; i < sorted.length; i++) {
                        m_Converter[i] = randomIndices[sorted[i]];
                    }
                } else if (m_ClassOrder == FREQ_DESCEND) {
                    for (int i = 0; i < sorted.length; i++) {
                        m_Converter[i] = randomIndices[sorted[sorted.length - i - 1]];
                    }
                } else {
                    throw new IllegalArgumentException("Class order not defined!");
                }

                // Change class counts
                double[] tmp2 = new double[m_ClassCounts.length];
                for (int i = 0; i < m_Converter.length; i++) {
                    tmp2[i] = m_ClassCounts[m_Converter[i]];
                }
                m_ClassCounts = tmp2;
            }

            // Change the class values
            ArrayList<String> values = new ArrayList<String>(data.classAttribute().numValues());
            for (int i = 0; i < data.numClasses(); i++) {
                values.add(data.classAttribute().value(m_Converter[i]));
            }
            ArrayList<Attribute> newVec = new ArrayList<Attribute>(data.numAttributes());
            for (int i = 0; i < data.numAttributes(); i++) {
                if (i == data.classIndex()) {
                    newVec.add(new Attribute(data.classAttribute().name(), values, data.classAttribute().getMetadata()));
                } else {
                    newVec.add(data.attribute(i));
                }
            }
            Instances newInsts = new Instances(data.relationName(), newVec, 0);
            newInsts.setClassIndex(data.classIndex());
            setOutputFormat(newInsts);

            // From now on we need m_Converter to convert old indices into new ones
            int[] temp = new int[m_Converter.length];
            for (int i = 0; i < temp.length; i++) {
                temp[m_Converter[i]] = i;
            }
            m_Converter = temp;

            // Process all instances
            for (int xyz = 0; xyz < data.numInstances(); xyz++) {
                Instance datum = data.instance(xyz);
                if (!datum.isMissing(datum.classIndex())) {
                    datum.setClassValue(m_Converter[(int) datum.classValue()]);
                }
                push(datum, false); // No need to copy instance.
            }
        }
        flushInput();
        m_NewBatch = true;
        return (numPendingOutput() != 0);
    }

    /**
     * Get the class distribution of the sorted class values. If class is numeric it
     * returns null
     * 
     * @return the class counts
     */
    public double[] getClassCounts() {

        if (m_ClassAttribute.isNominal()) {
            return m_ClassCounts;
        } else {
            return null;
        }
    }

    /**
     * Convert the given class distribution back to the distributions with the
     * original internal class index
     * 
     * @param before the given class distribution
     * @return the distribution converted back
     */
    public double[] distributionsByOriginalIndex(double[] before) {

        double[] after = new double[m_Converter.length];
        for (int i = 0; i < m_Converter.length; i++) {
            after[i] = before[m_Converter[i]];
        }

        return after;
    }

    /**
     * Return the original internal class value given the randomized class value,
     * i.e. the string presentations of the two indices are the same. It's useful
     * when the filter is used within a classifier so that the filtering procedure
     * should be transparent to the evaluation
     * 
     * @param value the given value
     * @return the original internal value, -1 if not found
     * @throws Exception if the coverter table is not set yet
     */
    public double originalValue(double value) throws Exception {

        if (m_Converter == null) {
            throw new IllegalStateException("Coverter table not defined yet!");
        }

        for (int i = 0; i < m_Converter.length; i++) {
            if ((int) value == m_Converter[i]) {
                return i;
            }
        }

        return -1;
    }

    /**
     * Main method for testing this class.
     * 
     * @param argv should contain arguments to the filter: use -h for help
     */
    public static void main(String[] argv) {
        runFilter(new ClassOrder(), argv);
    }
}
